BibBase https://api.zotero.org/groups/2386895/collections/4YE3UGQK/items?format=bibtex&limit=100

2021 (1)

A survey of OCR evaluation tools and metrics. Neudecker, C.; Baierer, K.; Gerber, M.; Clausner, C.; Antonacopoulos, A.; and Pletschacher, S. In The 6th International Workshop on Historical Document Imaging and Processing, pages 13–18, Lausanne Switzerland, September 2021. ACM

A survey of OCR evaluation tools and metrics [link]

Paper doi link bibtex abstract

@inproceedings{neudecker_survey_2021,
	address = {Lausanne Switzerland},
	title = {A survey of {OCR} evaluation tools and metrics},
	isbn = {978-1-4503-8690-6},
	url = {https://dl.acm.org/doi/10.1145/3476887.3476888},
	doi = {10.1145/3476887.3476888},
	abstract = {The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.},
	language = {en},
	urldate = {2023-06-21},
	booktitle = {The 6th {International} {Workshop} on {Historical} {Document} {Imaging} and {Processing}},
	publisher = {ACM},
	author = {Neudecker, Clemens and Baierer, Konstantin and Gerber, Mike and Clausner, Christian and Antonacopoulos, Apostolos and Pletschacher, Stefan},
	month = sep,
	year = {2021},
	pages = {13--18},
}

2019 (2)

Mapping Human Landscapes in Muscat, Oman, with Social Media Data. Tomarchio, L. ,38 p.. 2019. Artwork Size: 38 p. Medium: application/pdf Publisher: ETH Zurich

Paper doi link bibtex abstract

@article{tomarchio_mapping_2019,
	title = {Mapping {Human} {Landscapes} in {Muscat}, {Oman}, with {Social} {Media} {Data}},
	copyright = {Creative Commons Attribution Share Alike 4.0 International, info:eu-repo/semantics/openAccess},
	url = {http://hdl.handle.net/20.500.11850/339868},
	doi = {10.3929/ETHZ-B-000339868},
	abstract = {The paper presents a mapping process to define activity patterns and reveal the localisation of different city usersin Muscat, Oman, using social media data. The paper has two aims: to present a methodology to map activity patterns in the city in the Omani context, using social media data; to interpret the data and extract valuable narratives for the case study of Muscat. As various social media have penetrated into the daily life of people, these become one important and effective data source to understand how people use the spaces of the city. There is a series of questions related to big data and urban space that emerge such as: can social media data be “mined” in Muscat, Oman, to create design-relevant spatial information? What information about the use of urban space in the context of an Arab city can be extracted from social media data? The case study deals with Muscat, the capital of Oman, a city with peculiar socio-demographic, cultural aspects, influencing the use of the space, particularly when relating to open and public spaces. The proposed study uses data extracted from Twitter and Instagram to perform an analysis of the city of Muscat: The analysis looks at three scales and presents four thematic layers: one layer of generally finding hotspots of activities; two layers of investigating different patterns of activities during the day-night, weekdays-weekends and one layer of looking into the languages spoken in different areas of the city. This results in the mapping of how different sociallinguistic groups possibly move and interact in Muscat. The first part of the paper will present the methodology, from data collection to visualisation. The second part will look in detail at some selected areas and exemplify the narrative so that planners and designers can extract data from this approach and methodology.},
	language = {en},
	urldate = {2021-03-03},
	author = {Tomarchio, Ludovica},
	year = {2019},
	note = {Artwork Size: 38 p.
Medium: application/pdf
Publisher: ETH Zurich},
	keywords = {Digital Humanities, HUMAN GEOGRAPHY, Oman (South West Asia). Sultanat of Oman, social media},
	pages = {38 p.},
}

ICDAR 2019 Competition on Table Detection and Recognition (cTDaR). Déjean, H.; Meunier, J.; Gao, L.; Huang, Y.; Fang, Y.; Kleber, F.; and Lang, E. April 2019.

ICDAR 2019 Competition on Table Detection and Recognition (cTDaR) [link]

Paper doi link bibtex abstract

@misc{dejean_icdar_2019,
	title = {{ICDAR} 2019 {Competition} on {Table} {Detection} and {Recognition} ({cTDaR})},
	url = {https://zenodo.org/record/3239032#.X1IyZdbgqrI},
	doi = {10.5281/zenodo.3239032},
	abstract = {The aim of this competition is to evaluate the performance of state of the art methods for table detection (TRACK A) and table recognition (TRACK B). For the first track, document images containing one or several tables are provided. For TRACK B two subtracks exist: the first subtrack (B.1) provides the table region. Thus, only the table structure recognition must be performed. The second subtrack (B.2) provides no a-priori information. This means, the table region and table structure detection has to be done. The Ground Truth is provided in a similar format as for the ICDAR 2013 competition (see [2]): {\textless}?xml version="1.0" encoding="UTF-8"?{\textgreater} {\textless}document filename='filename.jpg'{\textgreater}     {\textless}table id='Table\_1540517170416\_3'{\textgreater}          {\textless}Coords points="180,160 4354,160 4354,3287 180,3287"/{\textgreater}        {\textless}cell id='TableCell\_1540517477147\_58' start-row='0' start-col='0' end-row='1' end-col='2'{\textgreater}            {\textless}Coords points="180,160 177,456 614,456 615,163"/{\textgreater}        {\textless}/cell{\textgreater}         ...     {\textless}/table{\textgreater}     ... {\textless}/document{\textgreater}   The difference to Gobel et al. [2] is the Coords tag which defines a table/cell as a polygon specified by a list of coordinates. For B.1 the table and its coordinates is given together with the input image. Important Note: For the modern dataset, the convex hull of the content describes a cell region. For the historical dataset, it is requested that the output region of a cell is the cell boundary. This is necessary due to the characteristics of handwritten text, which is often overlapping with different cells. See also: http://sac.founderit.com/tasks.html The evaluation tool is available at github: https://github.com/cndplab-founder/ctdar\_measurement\_tool},
	urldate = {2020-09-04},
	publisher = {Zenodo},
	author = {Déjean, Hervé and Meunier, Jean-Luc and Gao, Liangcai and Huang, Yilun and Fang, Yu and Kleber, Florian and Lang, Eva-Maria},
	month = apr,
	year = {2019},
}

2018 (4)

ICFHR2018 Competition on Automated Text Recognition on a READ Dataset. Strauß, T.; Leifert, G.; Labahn, R.; Hodel, T.; and Mühlberger, G. In 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR), pages 477–482, August 2018.
doi link bibtex abstract

@inproceedings{straus_icfhr2018_2018,
	title = {{ICFHR2018} {Competition} on {Automated} {Text} {Recognition} on a {READ} {Dataset}},
	doi = {10.1109/ICFHR-2018.2018.00089},
	abstract = {We summarize the results of a competition on Automated Text Recognition targeting the effective adaptation of recognition engines to essentially new data. The task consists in achieving a minimum character error rate on a previously unknown text corpus from which only a few pages are available for adjusting an already pre-trained recognition engine. This issue addresses a frequent application scenario where only a small amount of task-specific training data is available, because producing this data usually requires much effort. We present the results of five submission. They show that the task is a challenging issue but for certain documents 16 pages of transcription are sufficient to adapt a pre-trained recognition system.},
	booktitle = {2018 16th {International} {Conference} on {Frontiers} in {Handwriting} {Recognition} ({ICFHR})},
	author = {Strauß, Tobias and Leifert, Gundram and Labahn, Roger and Hodel, Tobias and Mühlberger, Günter},
	month = aug,
	year = {2018},
	keywords = {Computational modeling, Data models, Optical imaging, Task analysis, Text recognition, Training, Training data, automated text recognition, fast adaptation, few shot learning, historical documents},
	pages = {477--482},
}

System Description of CITlab's Recognition & Retrieval Engine for ICDAR2017 Competition on Information Extraction in Historical Handwritten Records. Strauss, T.; Weidemann, M.; Michael, J.; Leifert, G.; Grüning, T.; and Labahn, R. CoRR, abs/1804.09943. 2018.

Paper link bibtex

@article{strauss_system_2018,
	title = {System {Description} of {CITlab}'s {Recognition} \& {Retrieval} {Engine} for {ICDAR2017} {Competition} on {Information} {Extraction} in {Historical} {Handwritten} {Records}},
	volume = {abs/1804.09943},
	url = {http://arxiv.org/abs/1804.09943},
	urldate = {2018-06-29},
	journal = {CoRR},
	author = {Strauss, Tobias and Weidemann, Max and Michael, Johannes and Leifert, Gundram and Grüning, Tobias and Labahn, Roger},
	year = {2018},
}

The Shape of Data in the Digital Humanities: Modeling Texts and Text-based Resources. Flanders, J.; and Jannidis, F., editors. Routledge, Abingdon, Oxon ; New York, NY : Routledge, 2019. \textbar Series: Digital research in the arts and humanities, 1 edition, November 2018.

The Shape of Data in the Digital Humanities: Modeling Texts and Text-based Resources [link]

Paper doi link bibtex

@book{flanders_shape_2018,
	address = {Abingdon, Oxon ; New York, NY : Routledge, 2019. {\textbar} Series: Digital research in the arts and humanities},
	edition = {1},
	title = {The {Shape} of {Data} in the {Digital} {Humanities}: {Modeling} {Texts} and {Text}-based {Resources}},
	isbn = {978-1-315-55294-1},
	shorttitle = {The {Shape} of {Data} in the {Digital} {Humanities}},
	url = {https://www.taylorfrancis.com/books/9781317016151},
	language = {en},
	urldate = {2020-01-14},
	publisher = {Routledge},
	editor = {Flanders, Julia and Jannidis, Fotis},
	month = nov,
	year = {2018},
	doi = {10.4324/9781315552941},
}

READ-BAD: A New Dataset and Evaluation Scheme for Baseline Detection in Archival Documents. Gruning, T.; Labahn, R.; Diem, M.; Kleber, F.; and Fiel, S. In 13th IAPR International Workshop on Document Analysis Systems, DAS 2018, Vienna, Austria, April 24-27, 2018, pages 351–356, 2018. IEEE Computer Society

READ-BAD: A New Dataset and Evaluation Scheme for Baseline Detection in Archival Documents [link]

Paper doi link bibtex 1 download

@inproceedings{gruning_read-bad:_2018,
	title = {{READ}-{BAD}: {A} {New} {Dataset} and {Evaluation} {Scheme} for {Baseline} {Detection} in {Archival} {Documents}},
	isbn = {978-1-5386-3346-5},
	shorttitle = {{READ}-{BAD}},
	url = {http://doi.ieeecomputersociety.org/10.1109/DAS.2018.38},
	doi = {10.1109/DAS.2018.38},
	urldate = {2018-06-29},
	booktitle = {13th {IAPR} {International} {Workshop} on {Document} {Analysis} {Systems}, {DAS} 2018, {Vienna}, {Austria}, {April} 24-27, 2018},
	publisher = {IEEE Computer Society},
	author = {Gruning, Tobias and Labahn, Roger and Diem, Markus and Kleber, Florian and Fiel, Stefan},
	year = {2018},
	pages = {351--356},
}

2016 (1)

Editorial Comment: Theatre, the Digital, and the Analysis and Documentation of Performance. Tompkins, J. Theatre Journal, 68(4): xi–xiv. 2016.

Editorial Comment: Theatre, the Digital, and the Analysis and Documentation of Performance [link]

Paper doi link bibtex abstract

@article{tompkins_editorial_2016,
	title = {Editorial {Comment}: {Theatre}, the {Digital}, and the {Analysis} and {Documentation} of {Performance}},
	volume = {68},
	issn = {1086-332X},
	shorttitle = {Editorial {Comment}},
	url = {https://muse.jhu.edu/article/645393},
	doi = {10.1353/tj.2016.0103},
	abstract = {Throughout its history, theatre has capitalized on advances in technology, from shifts in lighting practices, to the development of machinery for creating special effects, to the advent of multimedia in contemporary performance, and beyond. The authors demystify digital humanities methodologies for theatre and performance research/researchers by demonstrating that theatre history has always employed quantitative, bigger picture approaches in addition to close readings of performances, and that digital technologies facilitate the analysis of larger datasets more effectively.},
	language = {en},
	number = {4},
	urldate = {2020-02-24},
	journal = {Theatre Journal},
	author = {Tompkins, Joanne},
	year = {2016},
	pages = {xi--xiv},
}

2015 (2)

Digital performance : a history of new media in theater, dance, performance art, and installation. Dixon, S.; and Smith, B. of LeonardoThe MIT Press, Cambridge, Massachusetts, London, England, [Paperback edition] edition, 2015.
link bibtex

@book{dixon_digital_2015,
	address = {Cambridge, Massachusetts, London, England},
	edition = {[Paperback edition]},
	series = {Leonardo},
	title = {Digital performance : a history of new media in theater, dance, performance art, and installation},
	isbn = {978-0-262-52752-1},
	shorttitle = {Digital performance},
	publisher = {The MIT Press},
	author = {Dixon, Steve and Smith, Barry},
	year = {2015},
}

Goal-oriented performance evaluation methodology for page segmentation techniques. Stamatopoulos, Nikolaos; and Gatos, Basilis In Proceedings of the 13th international conference on document analysis and recognition (ICDAR), pages 281–285. 2015.
link bibtex

@incollection{stamatopoulos_nikolaos_goal-oriented_2015,
	title = {Goal-oriented performance evaluation methodology for page segmentation techniques},
	booktitle = {Proceedings of the 13th international conference on document analysis and recognition ({ICDAR})},
	author = {{Stamatopoulos, Nikolaos} and {Gatos, Basilis}},
	year = {2015},
	pages = {281--285},
}

2010 (1)

The PAGE (Page Analysis and Ground-Truth Elements) Format Framework. Pletschacher, S.; and Antonacopoulos, A. In pages 257–260, August 2010. IEEE

The PAGE (Page Analysis and Ground-Truth Elements) Format Framework [link]

Paper doi link bibtex 1 download

@inproceedings{pletschacher_page_2010,
	title = {The {PAGE} ({Page} {Analysis} and {Ground}-{Truth} {Elements}) {Format} {Framework}},
	isbn = {978-1-4244-7542-1},
	url = {http://ieeexplore.ieee.org/document/5597587/},
	doi = {10.1109/ICPR.2010.72},
	urldate = {2018-06-22},
	publisher = {IEEE},
	author = {Pletschacher, Stefan and Antonacopoulos, Apostolos},
	month = aug,
	year = {2010},
	pages = {257--260},
}

1999 (1)

STTS Tag Table. Institut für Maschinelle Sprachverarbeitung. Universität Stuttgart. 1999.

STTS Tag Table. Institut für Maschinelle Sprachverarbeitung. Universität Stuttgart [link]

Paper link bibtex

@misc{noauthor_stts_1999,
	title = {{STTS} {Tag} {Table}. {Institut} für {Maschinelle} {Sprachverarbeitung}. {Universität} {Stuttgart}},
	shorttitle = {{STTS}},
	url = {http://www.ims.uni-stuttgart.de/forschung/ressourcen/lexika/TagSets/stts-table.html},
	urldate = {2014-07-29},
	journal = {STTS Tag Table (1995/1999)},
	year = {1999},
}